Load required packages

library(tidyverse)
library(phyloseq)
library(speedyseq)
library(ggrepel)
library(ampvis2)
library(plotly)
library(microbiome)
library(here)
options(getClass.msg=FALSE) # https://github.com/epurdom/clusterExperiment/issues/66
#this fixes an error message that pops up because the class 'Annotated' is defined in two different packages

Source required functions

'%!in%' <- function(x,y)!('%in%'(x,y))

source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_taxa_tests.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_normalisation.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_alpha.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_beta.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_heatmap.R")
plot_time <- function(df, 
                      measure,
                      x = "Day_from_Inoculum", 
                      y = "value", 
                      shape = "neg",
                      fill = "Reactor_Treatment",
                      group = "Reactor_Treatment", 
                      point_size=0.5,
                      facet,
                      smooth = FALSE)
{
  df %>%
    dplyr::filter(alphadiversiy %in% measure) %>%
    dplyr::mutate(alphadiversiy = fct_reorder(alphadiversiy, value, .desc = TRUE)) %>%
    dplyr::mutate(neg = ifelse(value == 0, "neg", "pos")) %>%
    arrange(Day_from_Inoculum) %>%
    ggplot(aes_string(x = x,
                      y = y)) +
    geom_jitter(alpha=0.9, size = point_size, aes_string(color = fill, fill = fill, shape = shape),  show.legend = TRUE) + 
    geom_path(inherit.aes = TRUE, aes_string(fill = fill, color = fill, show.legend = FALSE),
              size = 0.01,
              linetype = "dashed") +
    facet_grid(as.formula(facet), scales = "free") +
    geom_vline(xintercept = c(0),
               color="black", alpha=0.4) + theme_light() -> plot
  
  if(smooth == TRUE) 
  {
    plot +
      geom_smooth(show.legend = FALSE, level = 0.95, alpha=0.005, size = 0.5 ,aes_string(color = fill, fill = fill))  -> plot
  }
  # scale_y_continuous(labels = scientific,
  #                    limits=c(1e+10, 1e+11), breaks = seq(1e+10, 1e+11, by = 1e+10),
  #                    trans = "log10") +
  
  
  return(plot + theme(legend.position = "bottom"))
}

Import phyloseq object

ps = "data/raw/metabarcoding/ps_silva_dada2_human_chicken_meta.RDS"

ps %>% 
  here::here() %>%
  readRDS() %>%
  phyloseq_get_strains_fast() %>% 
  filter_taxa(function(x) sum(x > 0) > 0, TRUE) %>% 
  subset_samples(Enrichment == "NotEnriched") %>% 
  subset_samples(Model == "Human")  -> physeq
## Joining, by = "ASV"
physeq %>% 
  sample_data() %>% 
  data.frame() %>% 
  DT::datatable()

Rarefaction:

Curves

require(parallel)
## Loading required package: parallel
physeq %>%
  ggrare(step = 100, parallel = TRUE,  se = FALSE, 
         color = "Day_of_Treatment" , plot = FALSE) -> rare_curves
rare_curves +
  theme_classic() + 
  geom_vline(xintercept = sample_sums(physeq) %>% min(),
             color="red", 
             linetype="dashed", size=0.25) + 
  facet_wrap(~ Reactor_Treatment) + ylab("ASV Richness") -> rare_plot

rare_plot

# rare_plot %>% 
#   export::graph2ppt(append = TRUE,
#                   file = file.path(here::here("data/processed/figures_NRP72")))

chicken rarefaction was 4576, let see how it looks here.

rare_curves +
  theme_classic() + facet_null() + coord_cartesian(xlim = c(0,5000))

Rarefaction:

physeq %>% 
  rarefy_even_depth(sample.size = 4576,
                    rngseed = 123) -> physeq_rare
## `set.seed(123)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(123); .Random.seed` for the full vector
## ...
## 56 samples removedbecause they contained fewer reads than `sample.size`.
## Up to first five removed samples are:
## D-1-S49IR-13-S103IR-46-S126IR-51-S157IR-74-S188
## ...
## 657OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
physeq_rare
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 453 taxa and 144 samples ]:
## sample_data() Sample Data:        [ 144 samples by 59 sample variables ]:
## tax_table()   Taxonomy Table:     [ 453 taxa by 8 taxonomic ranks ]:
## phy_tree()    Phylogenetic Tree:  [ 453 tips and 452 internal nodes ]:
## refseq()      DNAStringSet     :      [ 453 reference sequences ]
## taxa are rows
require(parallel)

physeq_rare %>%
  ggrare(step = 50, parallel = TRUE,  se = FALSE, 
         color = "Day_of_Treatment" , plot = FALSE) -> rare_rare_curves
rare_rare_curves +
  theme_classic() + 
  geom_vline(xintercept = sample_sums(physeq_rare) %>% min(),
             color="red", 
             linetype="dashed", size=0.25) + 
  facet_wrap(~ Reactor_Treatment) + ylab("ASV Richness") -> rare_rare_plot

rare_rare_plot

# rare_rare_plot %>% 
#   export::graph2ppt(append = TRUE,
#                   file = file.path(here::here("data/processed/figures_NRP72")))

alpha_div:

Alpha diversity measures over time

physeq_rare %>%
  phyloseq_alphas(phylo = FALSE) -> alpha_df
measures_alpha = c("Observed", "diversity_shannon", "evenness_pielou")#, "PD","MNTD", "SES.MPD" ,"bNTI")

alpha_df %>%
  plot_alphas(measure = measures_alpha,
              x_group = "Reactor_Treatment",
              colour_group = "Enrichment",
              fill_group = "Enrichment",
              shape_group = "Enrichment",
              facet_group = "Reactor_Treatment",
              test_group = "Reactor_Treatment",
              test_group_2 = "Enrichment") -> alpha_out 
alpha_out$plot$data %>%
  filter(Treatment %!in% c("DONOR", "positive", "negative", "STRAIN")) %>% 
  filter(!is.na(Fermentation)) %>% 
  filter(Day_of_Treatment > -5) %>% 
  plot_time(measure = measures_alpha,
            x = "Day_of_Treatment",
            facet = c("alphadiversiy ~ Fermentation"),
            fill = "Treatment",
            group = "Treatment",
            shape = "Reactor",
            smooth = TRUE,
            point_size = 1) + 
  labs(x="Day (from Treatment)", y= "alpha-diversity",  
       col=NULL, fill = NULL, shape = NULL) + 
  guides(col = guide_legend(ncol = 3))  +
  scale_color_viridis_d(na.value = "black") +
  scale_fill_viridis_d(na.value = "black")  -> alpha_div_plot

alpha_div_plot

# alpha_div_plot %>% 
# export::graph2ppt(append = TRUE,
#                 file = file.path(here::here("data/processed/figures_NRP72")))
plotly::ggplotly(alpha_div_plot)
alpha_div_plot  + geom_boxplot(aes(group = Treatment,
                                   color =Treatment,
                                   fill = Treatment),
                               outlier.shape = NA,
                               outlier.colour = NA,
                               # outlier.shape = NA,
                               alpha = 0.2) + guides(col = guide_legend(ncol = 3)) -> alpha_div_plot

alpha_div_plot 

# alpha_div_plot %>% 
#   export::graph2ppt(append = TRUE,
#                     file = file.path(here::here("data/processed/figures_NRP72")))
alpha_out$stat %>% 
  filter(signif == "SIGN") %>% 
  arrange(alphadiversiy) %>% 
  DT::datatable()
ps = "data/raw/metabarcoding/ps_silva_dada2_human_chicken_meta.RDS"

ps %>% 
  here::here() %>%
  readRDS() %>%
  phyloseq_get_strains_fast() %>% 
  filter_taxa(function(x) sum(x > 0) > 0, TRUE) %>% 
  subset_samples(Enrichment == "NotEnriched") %>% 
    rarefy_even_depth(sample.size = 4576,
                    rngseed = 123)  %>% 
    subset_samples(Experiment == "Continuous")  -> tmp_rare
## `set.seed(123)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(123); .Random.seed` for the full vector
## ...
## Joining, by = "ASV"
## 73 samples removedbecause they contained fewer reads than `sample.size`.
## Up to first five removed samples are:
## CR-40-S166CR-52-S196EMPTY-S384negativ-S80negative2-S181
## ...
## 351OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
tmp_rare %>%
  phyloseq_alphas(phylo = FALSE) -> alpha_df
measures_alpha = c("Observed", "diversity_shannon", "evenness_pielou")#, "PD","MNTD", "SES.MPD" ,"bNTI")

alpha_df %>%
  plot_alphas(measure = measures_alpha,
              x_group = "Reactor_Treatment",
              colour_group = "Enrichment",
              fill_group = "Enrichment",
              shape_group = "Enrichment",
              facet_group = "Reactor_Treatment",
              test_group = "Reactor_Treatment",
              test_group_2 = "Enrichment") -> alpha_out 
alpha_out$plot$data %>%
  # filter(Treatment %!in% c("DONOR", "positive", "negative", "STRAIN")) %>% 
  # filter(!is.na(Fermentation)) %>% 
  filter(Day_of_Treatment > -5, Day_of_Treatment < 20) %>%
  plot_time(measure = measures_alpha,
            x = "Day_of_Treatment",
            facet = c("alphadiversiy ~ Model + Fermentation"),
            fill = "Treatment",
            group = "Treatment",
            shape = NULL,
            smooth = TRUE,
            point_size = 1) + 
  labs(x="Day (from Treatment)", y= "alpha-diversity",  
       col=NULL, fill = NULL, shape = NULL) + 
  guides(col = guide_legend(ncol = 3))  +
  scale_color_viridis_d(na.value = "black") +
  scale_fill_viridis_d(na.value = "black")  -> alpha_div_plot

alpha_div_plot

# alpha_div_plot %>% 
# export::graph2ppt(append = TRUE,
#                 file = file.path(here::here("data/processed/figures_NRP72")))
plotly::ggplotly(alpha_div_plot)
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] metagMisc_0.0.4      reshape2_1.4.4       scales_1.1.1        
##  [4] here_1.0.1           microbiome_1.10.0    plotly_4.9.2.1      
##  [7] ampvis2_2.6.5        ggrepel_0.8.2        speedyseq_0.5.0     
## [10] phyloseq_1.34.0      forcats_0.5.0        stringr_1.4.0       
## [13] dplyr_1.0.4          purrr_0.3.4          readr_1.4.0         
## [16] tidyr_1.1.2          tibble_3.0.6         ggplot2_3.3.3       
## [19] tidyverse_1.3.0.9000
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15          colorspace_2.0-0    ggsignif_0.6.0     
##   [4] rio_0.5.16          ellipsis_0.3.1      rprojroot_2.0.2    
##   [7] XVector_0.28.0      fs_1.5.0            rstudioapi_0.13    
##  [10] ggpubr_0.4.0        farver_2.0.3        ggnet_0.1.0        
##  [13] DT_0.15             lubridate_1.7.9     xml2_1.3.2         
##  [16] codetools_0.2-16    splines_4.0.2       doParallel_1.0.16  
##  [19] knitr_1.31          ade4_1.7-16         jsonlite_1.7.2     
##  [22] Cairo_1.5-12.2      broom_0.7.2         cluster_2.1.0      
##  [25] dbplyr_1.4.4        compiler_4.0.2      httr_1.4.2         
##  [28] backports_1.2.1     assertthat_0.2.1    Matrix_1.2-18      
##  [31] lazyeval_0.2.2      cli_2.3.0           htmltools_0.5.1.1  
##  [34] prettyunits_1.1.1   tools_4.0.2         igraph_1.2.6       
##  [37] gtable_0.3.0        glue_1.4.2          Rcpp_1.0.6         
##  [40] carData_3.0-4       Biobase_2.50.0      cellranger_1.1.0   
##  [43] vctrs_0.3.6         Biostrings_2.56.0   multtest_2.44.0    
##  [46] ape_5.4-1           nlme_3.1-149        iterators_1.0.13   
##  [49] crosstalk_1.1.0.1   xfun_0.21           network_1.16.0     
##  [52] openxlsx_4.2.3      rvest_0.3.6         lifecycle_1.0.0    
##  [55] rstatix_0.6.0       zlibbioc_1.34.0     MASS_7.3-52        
##  [58] hms_1.0.0           biomformat_1.7.0    rhdf5_2.32.4       
##  [61] RColorBrewer_1.1-2  curl_4.3            yaml_2.2.1         
##  [64] stringi_1.5.3       highr_0.8           S4Vectors_0.26.1   
##  [67] foreach_1.5.1       permute_0.9-5       BiocGenerics_0.34.0
##  [70] zip_2.1.1           rlang_0.4.10        pkgconfig_2.0.3    
##  [73] evaluate_0.14       lattice_0.20-41     Rhdf5lib_1.10.1    
##  [76] patchwork_1.1.0     htmlwidgets_1.5.3   labeling_0.4.2     
##  [79] tidyselect_1.1.0    plyr_1.8.6          magrittr_2.0.1     
##  [82] R6_2.5.0            IRanges_2.22.2      generics_0.1.0     
##  [85] DBI_1.1.1           foreign_0.8-80      pillar_1.4.7       
##  [88] haven_2.3.1         withr_2.4.1         mgcv_1.8-32        
##  [91] abind_1.4-5         survival_3.2-3      car_3.0-10         
##  [94] modelr_0.1.8        crayon_1.4.1        rmarkdown_2.4      
##  [97] progress_1.2.2      grid_4.0.2          readxl_1.3.1       
## [100] data.table_1.13.6   blob_1.2.1          vegan_2.5-7        
## [103] reprex_0.3.0        digest_0.6.27       stats4_4.0.2       
## [106] munsell_0.5.0       viridisLite_0.3.0